from pyspark.sql import SparkSession

# Driver
spark = SparkSession \
    .builder \
    .master('local') \
    .appName('HelloSpark') \
    .getOrCreate()

rdd = spark.sparkContext.parallelize([('tom', 20), ('jack', 18)])
df = rdd.toDF(['name', 'age'])

df.printSchema()  # 打印schema
df.show(truncate=False)  # 打印数据
# 哈哈哈哈
